#!/usr/bin/env python

import os
import re
import sys
import shutil
import datetime
import subprocess
from os import path
from glob import iglob
from pprint import pprint
from argparse import ArgumentParser
from collections import OrderedDict

from pygments import highlight
from pygments.lexers import get_lexer_by_name
from pygments.formatters import HtmlFormatter

import unidecode
import houdini as h
from tornado import template
from misaka import Markdown, HtmlRenderer, HtmlTocRenderer, SmartyPants, \
    EXT_NO_INTRA_EMPHASIS, EXT_TABLES, EXT_FENCED_CODE, EXT_AUTOLINK, \
    EXT_STRIKETHROUGH, EXT_LAX_SPACING, EXT_SPACE_HEADERS, \
    EXT_SUPERSCRIPT, \
    HTML_SKIP_HTML, HTML_SKIP_STYLE, HTML_SKIP_IMAGES, HTML_SKIP_LINKS, \
    HTML_EXPAND_TABS, HTML_SAFELINK, HTML_TOC, HTML_HARD_WRAP, \
    HTML_USE_XHTML, HTML_ESCAPE


from yaml import load as load_yaml
try:
    from yaml import CLoader as YamlLoader
except ImportError:
    from yaml import Loader as YamlLoader


misaka_extensions = {
    'no_intra_emphasis': EXT_NO_INTRA_EMPHASIS,
    'tables': EXT_TABLES,
    'fenced_code': EXT_FENCED_CODE,
    'autolink': EXT_AUTOLINK,
    'strikethrough': EXT_STRIKETHROUGH,
    'lax_spacing': EXT_LAX_SPACING,
    'space_headers': EXT_SPACE_HEADERS,
    'superscript': EXT_SUPERSCRIPT
}

misaka_html_flags = {
    'skip_html': HTML_SKIP_HTML,
    'skip_style': HTML_SKIP_STYLE,
    'skip_images': HTML_SKIP_IMAGES,
    'skip_links': HTML_SKIP_LINKS,
    'expand_tabs': HTML_EXPAND_TABS,
    'safelink': HTML_SAFELINK,
    'toc': HTML_TOC,
    'hard_wrap': HTML_HARD_WRAP,
    'use_xhtml': HTML_USE_XHTML,
    'escape': HTML_ESCAPE
}


RE_NON_WORD = re.compile(r'\W+')

def slugify(s):
    if '/' in s:
        return '/'.join(map(slugify, s.split('/')))
    s = unidecode.unidecode(s).lower()
    return RE_NON_WORD.sub('-', s)


def list_files(dir_path):
    """List all files recursively."""
    for name in os.listdir(dir_path):
        full_path = path.join(dir_path, name)
        if path.isdir(full_path):
            for name in list_files(full_path):
                yield name
        else:
            yield full_path


def parse_yaml(data):
    return ExtendedDict(load_yaml(data, Loader=YamlLoader))


def split_frontmatter(data):
    m = re.match(r'\A---\s+^(.+?)$\s+---\s*(.*)\Z', data, re.M | re.S)
    return (parse_yaml(m.group(1)), m.group(2)) if m else None


# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
def format_date(format, date=None):
    if date is None:
        date = datetime.datetime.utcnow()
    return date.strftime(format)


class ExtendedDict(dict):
    def __getattr__(self, name):
        if name in self:
            return self[name]
        else:
            return None

    def __setattr__(self, name, value):
        self[name] = value


class CodeHtmlFormatter(HtmlFormatter):
    def wrap(self, source, outfile):
        return self._wrap_code(source)

    def _wrap_code(self, source):
        yield 0, '<pre class="highlight"><code>'
        for i, t in source:
            yield i, t
        yield 0, '</code></pre>'


RE_WIKILINK = re.compile(r'(?<!\\)\[\[(.*?)(?:\s*\|\s*(.*?))?\]\]')

class ElyseRenderer(HtmlRenderer, SmartyPants):
    elyse = None

    def _render_str(self, text):
        return text

    def preprocess(self, document):
        document = super().preprocess(document)
        return self._render_str(document)

    def _make_wikilink(self, match):
        title, ident = match.groups()
        if not title:
            return 'NO TITLE'
        return '<a href="%s">%s</a>' % (self.elyse._get_url(ident or title), title)

    def postprocess(self, document):
        document = super().postprocess(document)
        return RE_WIKILINK.sub(self._make_wikilink, document)

    def block_code(self, text, lang):
        if not lang:
            return '\n<pre class="highlight"><code>%s</code></pre>\n' % h.escape_html(text.strip())
        lexer = get_lexer_by_name(lang, stripall=True)
        formatter = CodeHtmlFormatter()
        return highlight(text, lexer, formatter)


class Elyse(object):

    def __init__(self, opts={}):
        self.opts = opts
        self.config = ExtendedDict()
        self.opts['src'] = path.abspath(opts['src'])
        self.opts['dest'] = path.abspath(opts['dest'])

        # Check for configuration file
        config_path = path.join(self.opts['src'], 'config.yml')
        if path.exists(config_path):
            print('>> Load config.yml')
            with open(config_path, 'r') as fd:
                self.config = parse_yaml(fd.read())
        else:
            print('>> config.yml not found')

        # Data
        self._data = ExtendedDict({
            'posts': [],
            'archive': OrderedDict(),
            'tags': ExtendedDict()
        })

        # Template loader
        self._loader = template.Loader(path.join(self.opts['src'], 'templates'), autoescape=None)

        # Markdown
        extensions = 0
        if 'misaka_extensions' in self.config:
            for name in self.config.misaka_extensions:
                extensions |= misaka_extensions[name]

        flags = 0
        if 'misaka_html_flags' in self.config:
            for name in self.config.misaka_html_flags:
                flags |= misaka_html_flags[name]

        renderer = ElyseRenderer(flags)
        renderer.elyse = self
        renderer._render_str = self._render_str
        self._markdown = Markdown(renderer, extensions)

        if flags ^ HTML_TOC:
            flags |= HTML_TOC

        renderer_toc = ElyseRenderer(flags)
        renderer_toc.elyse = self
        renderer_toc._render_str = self._render_str
        self._toc = Markdown(HtmlTocRenderer(), extensions)
        self._markdown_toc = Markdown(renderer_toc, extensions)

        # Default template variables
        self._defaults = {
            'site': self.config,
            'url': self._get_url,
            'asset': self._get_asset,
            'markdown': self._render_md,
            'date': format_date
        }

    def _assets(self):
        src = path.join(self.opts['src'], 'assets')
        dest = path.join(self.opts['dest'], 'assets')
        if not path.exists(src):
            return

        print('>> Assets')
        shutil.copytree(src, dest)

    def _posts(self):
        src = path.join(self.opts['src'], 'posts')
        dest = self.opts['dest']
        if not path.exists(src):
            return

        print('>> Posts')

        for fn in reversed(sorted(os.listdir(src))):
            # Match: year-month-day-title.md
            match = re.match(r'([0-9]{4})-([0-9]{2})-([0-9]{2})\s+(.*?)\.md', fn)
            if not match:
                continue
            year, month, day, title = match.groups()

            dest_dir = path.join(dest, year, month, day, slugify(title))
            dest_fn = path.join(dest_dir, 'index.html')

            # Render post
            with open(path.join(src, fn), 'r') as fd:
                raw_data = fd.read()

            data = split_frontmatter(raw_data)
            if data is None:
                print('.. Invalid post: %s' % path.join(src, fn))
                continue

            post, content = data
            if 'layout' not in post:
                post.layout = 'post.html'

            # Last modification date of the post
            post.mdate = datetime.datetime.fromtimestamp(os.stat(path.join(src, fn)).st_mtime)

            post.url = '%s' % '/'.join(match.groups())
            post.date = datetime.date(*[int(n) for n in match.groups()[:-1]])
            post.content = self._render_md(content)
            output = self._render_tpl(post.layout, post=post)

            # Write
            os.makedirs(dest_dir)
            with open(dest_fn, 'wb') as fd:
                fd.write(output)

            # Add to posts
            self._data.posts.append(post)

            # Add to archive
            if 'archive_layout' in self.config:
                if post.date.year not in self._data.archive:
                    self._data.archive[post.date.year] = []
                self._data.archive[post.date.year].append(post)

            # Add to tags
            if 'tag_layout' in self.config:
                for tag in post.tags:
                    if tag not in self._data.tags:
                        self._data.tags[tag] = ExtendedDict({
                            'title': tag,
                            'posts': []
                        })
                    self._data.tags[tag].posts.append(post)

    def _pages(self):
        src = path.join(self.opts['src'], 'pages')
        dest = self.opts['dest']
        if not path.exists(src):
            return

        print('>> Pages')

        for fn in list_files(src):
            name, ext = path.splitext(fn.replace(src, '', 1).strip('/\\'))
            name = slugify(name)

            with open(fn, 'r') as fd:
                raw_data = fd.read()

            if ext == '.md':
                data = split_frontmatter(raw_data)
                if data is None:
                    print('.. Invalid page: %s%s' % (name, ext))
                    continue

                page, content = data
                if 'layout' not in page:
                    print('.. No layout specified: %s%s' % (name, ext))
                    continue

                if page.toc is True:
                    page.toc, page.content = self._render_md_toc(content)
                else:
                    page.content = self._render_md(content)

                output = self._render_tpl(page.layout, page=page,
                    posts=self._data.posts,
                    archive=self._data.archive,
                    tags=self._data.tags)
            else:
                output = self._render_str(raw_data,
                    posts=self._data.posts,
                    archive=self._data.archive,
                    tags=self._data.tags)

            # Check if files need to placed in the root
            if name in self.config.root:
                if ext == '.md':
                    ext = '.html'

                name = '%s%s' % (name, ext)
                dest_fn = path.join(dest, name)
            else:
                if not path.exists(path.join(dest, name)):
                    os.makedirs(path.join(dest, name))
                dest_fn = path.join(dest, name, 'index.html')

            # Write page
            with open(dest_fn, 'wb') as fd:
                fd.write(output)

    def _tags(self):
        if not self._data.tags or 'tag_layout' not in self.config:
            return

        print('>> Tags')
        dest = path.join(self.opts['dest'], 'tags')

        for tag, data in self._data.tags.items():
            output = self._render_tpl(self.config.tag_layout, tag=data)
            os.makedirs(path.join(dest, tag))
            with open(path.join(dest, tag, 'index.html'), 'wb') as fd:
                fd.write(output)

    def _archive(self):
        if 'archive_layout' not in self.config:
            return

        print('>> Archive')
        output = self._render_tpl(self.config.archive_layout,
            archive=self._data.archive)

        os.makedirs(path.join(self.opts['dest'], 'archive'))
        with open(path.join(self.opts['dest'], 'archive', 'index.html'), 'wb') as fd:
            fd.write(output)

    def _pre_scripts(self):
        if 'pre_scripts' not in self.config:
            return

        print('>> Pre-scripts')
        for script in self.config.pre_scripts:
            subprocess.Popen(path.join(self.opts['src'], 'scripts', script),
                cwd=self.opts['dest'], shell=True)

    def _post_scripts(self):
        if 'post_scripts' not in self.config:
            return

        print('>> Post-scripts')
        for script in self.config.post_scripts:
            subprocess.Popen(path.join(self.opts['src'], 'scripts', script),
                cwd=self.opts['dest'], shell=True)

    def _get_asset(self, path):
        return '%s/assets/%s' % (self.config.base_url, path)

    def _get_url(self, path=''):
        return '%s/%s' % (self.config.base_url, slugify(path))

    def _render_md(self, data):
        return self._markdown.render(data)

    def _render_md_toc(self, data):
        return (
            self._toc.render(data),
            self._markdown_toc.render(data)
        )

    def _render_toc(self, data):
        return self._markdown_toc.render(data)

    def _render_str(self, template_string, **kwargs):
        defaults = self._defaults
        defaults.update(kwargs)

        t = template.Template(template_string, loader=self._loader, autoescape=None)
        return t.generate(**defaults)

    def _render_tpl(self, template, **kwargs):
        defaults = self._defaults
        defaults.update(kwargs)

        return self._loader.load(template).generate(**defaults)

    def generate(self):
        # Cleanup before writing
        if path.exists(self.opts['dest']):
            for p in os.listdir(self.opts['dest']):
                p = path.join(self.opts['dest'], p)
                if path.isdir(p):
                    shutil.rmtree(p)
                else:
                    os.remove(p)

        # Generate~
        self._pre_scripts()
        self._assets()
        self._posts()
        self._pages()
        self._tags()
        self._archive()
        self._post_scripts()


def main(args):
    parser = ArgumentParser(description = 'A static blog generator.')

    parser.add_argument('src', default='.', nargs='?', metavar='source',
        help='the location %(prog)s looks for source files.')
    parser.add_argument('dest', default='build', nargs='?',
        metavar='destination', help='the location %(prog)s outputs to.')

    opts = vars(parser.parse_args(args))
    Elyse(opts).generate()


if __name__ == '__main__':
    main(sys.argv[1:])
